pacman::p_load(tidyverse, data.table, raster, sf)
rm(list=ls())
################################################################################ 

################ Extract yields from raster to geographic unit

shpfolder <- c("../../data/shapefiles/clean/")
shpfile_list <- c("argentina_county","brazil_county","world_country")

crop_list <- c("alfalfa","grass","maize","pasture","rice","soybean","sugarcane","sunflower","wheat")

for (shpfile_name in shpfile_list){

  if(shpfile_name=="argentina_county") {
    region = 'argentina'
    raster_bb = c(-76,-52, -57,-20)
    merge_cols = c('ctry','st','st_id','cty','cty_id')
  } else {
    if(shpfile_name=="brazil_county") {
      region = 'brazil'
      raster_bb = c(-76,-34,-35, 6) 
      merge_cols = c('ctry','st','st_id','cty','cty_id')
    } else {
      region = 'world'
      merge_cols = c("fid","ctry","iso","ctry_aff","iso_aff")
    }
  }
  shpfile = sf::read_sf(dsn=path.expand(shpfolder), layer=shpfile_name)
  
  for (crop in crop_list){
    
    cropfolder = paste0("../../data/agronomy/raw/",crop)
    
    #Unzip if necessary
    if(!dir.exists(cropfolder)) {unzip(paste0(cropfolder,".zip"), exdir = cropfolder)}
    
    #FAO GAEZ raster file
    if(shpfolder!="world_country"){
      raster_file = crop(raster(paste0(cropfolder,"/data.asc")), extent(raster_bb)) 
    } else {
      raster_file = raster(paste0(cropfolder,"/data.asc")) 
    }
    
    #Extract data
    mean_yield <- data.frame(extract(raster_file,  shpfile, fun = mean, na.rm = TRUE, df=TRUE, sp=TRUE))
    median_yield <- data.frame(extract(raster_file,  shpfile, fun = median, na.rm = TRUE, df=TRUE, sp=TRUE))
    max_yield <- data.frame(extract(raster_file,  shpfile, fun = max, na.rm = TRUE, df=TRUE, sp=TRUE))
    setnames(mean_yield,   old = "data" , new = 'mean')
    setnames(median_yield, old = "data" , new = 'median')
    setnames(max_yield,    old = "data" , new = 'max')
    
    #Merge statistics into one crop file
    crop_df = merge.data.frame(mean_yield, median_yield, by=merge_cols)
    crop_df = merge.data.frame(crop_df, max_yield, by=merge_cols)
    crop_df <- crop_df %>% mutate(across(.cols = everything(), toupper))
    crop_df$crop = crop

    #Append crop files into one country file
    if(crop == 'alfalfa') {
      final_df = crop_df
    } else { 
      final_df = rbind(final_df,crop_df)}
    
    #Delete zipped folder
    unlink(cropfolder, recursive=TRUE)

  }
  write.csv(final_df, gzfile(paste0("../../data/agronomy/clean/faogaez_",region,"_extracted.csv.gz")), row.names = FALSE)
}


################ Combine extracted files for different input levels into single country dataframe

for (region in c('argentina','brazil') ){
  
  df = fread(paste0("../../data/agronomy/clean/faogaez_",region,"_extracted.csv.gz"))
  df$int_mean = df$mean
  df$int_median = df$median
  df$int_max = df$max
  df_int = df[,c('cty_id','crop','int_mean','int_median','int_max')]
  df_int = df[,c('cty_id','crop','int_mean')]
  
  df = df_int
  df_1 = rename(df, c("county_id"="cty_id"))
  
  df = fread(paste0("../../data/landuse/clean/geographicunits_",region,".csv.gz"))
  df_2 = df[,-c('land_area_km2','population')]
  
  df = merge.data.frame(df_2, df_1, by='county_id')
  write.csv(df, gzfile(paste0("../../data/agronomy/clean/faogaez_",region,".csv.gz")), row.names = FALSE)
  
}

df_ar = fread("../../data/agronomy/clean/faogaez_argentina.csv.gz")
df_br = fread("../../data/agronomy/clean/faogaez_brazil.csv.gz")
df_br = df_br[,-c('microregion','mesoregion','microregion_id','mesoregion_id','amc_id')]
df_arbr = bind(df_ar,df_br)
write.csv(df_arbr, gzfile("../../data/agronomy/clean/faogaez_argentinabrazil.csv.gz"), row.names = FALSE)






